% Matlab code to estimate a mixed logit model with maximum simulated likelihood
% Written by Kenneth Train, first version July 19, 2006, 
% latest edits August 9, 2006
% Discounting added by Heather Snell May 17, 2019
% Last edits December 18, 2020

clear all
estimate=1; % set estimate=0 if you just want to display the loglik for start values

% Declare GLOBAL variables
% GLOBAL variables are all in caps
% DO NOT CHANGE ANY OF THESE 'global' STATEMENTS
global NP NCS NROWS printest ANA
global IDV NV NAMES B W
global IDF NF NAMESF F
global DRAWTYPE NDRAWS SEED1 SAVEDR PUTDR
global WANTWGT IDWGT WGT
global NALTMAX NCSMAX
global XX DR T 
global XMAT X
global NMEM NTAKES NPARAM
global MDR discountfun FirstRun modelname
format shortG
discountfun="Exponential";
printest=1;
modelname='dmixl.exp.mat';
ANA=0;        %set to zero for regular models instead of ANA models
printest = 1; %set to one to print out estimate each iteration, 0 if not
T = 65;       %This is the number of time periods for discouting
FirstRun = 1; %later this will change to zero after data is loaded

%Price is divided by 10 in the code below

% OUTPUT FILE
% Put the name you want for your output file (including full path if not the current 
% working directory) after words "delete" and "diary".
% The 'diary off' and 'delete filename' commands close and delete the previous version 
% of the file created during your current matlab session or in any previous sessions. 
% If you want to append the new output to the old output, then 
% put % in front of the 'diary off' and 'delete filename' commands (or erase them).

diary off
delete dmixl.exp.out
diary dmixl.exp.out

% TITLE
% Put a title for the run in the quotes below, to be printed at the top of the output file.
disp 'Discounted Mixed logit with 1000 Halton draws: lognormal scale, all others normal.';
disp 'Dicouting type';
discountfun
% DATA
% Load and/or create XMAT, a matrix that contains the data.
% We initially converted csv file to .mat file

% XMAT = csvread("discounting.data.csv", 1);
% save('discounting.data.mat','XMAT');
load('discounting.data.mat');  %This loads XMAT with the variables below

Nalts = 3; %number of alts per question
Nquest = 5; %number of questions each person answers
NROWS = size(XMAT, 1); % This is the number of rows of data in XMAT below.
NP = NROWS/(Nalts*Nquest); %Number of people in dataset
NCS = NP * Nquest; %Number of choice situations in dataset

% XMAT must contain one row of data for each alternative in each choice situation for each person.
% The rows are grouped by person, and by choice situations faced by each person.
% The number of rows in XMAT must be NROWS, specified above.
% The columns in XMAT are variable that describe the alternative.
% 
% The *first* column of XMAT identifies the person who faced this alternative. 
% The people must be numbered sequentially from 1 to NP, in ascending order.
% All alternatives for a given person must be grouped together.
% The *second* column of XMAT identifies the choice situation. The choice
% situations must be numbered sequentially from 1 to NCS.
% All alternatives for a given choice situation must be grouped together.
% The *third* column of XMAT identifies the chosen alternatives (1 for
% chosen, 0 for not). One and only one alternative must be chosen for each
% choice situation.
% The remaining columns of XMAT can be any variables.

%XMAT=load('data.txt');  %The variables are described below

% 1. Person number (1-NP)            MUST BE THIS. DO NOT CHANGE.
% 2. Choice situation number (1-NCS) MUST BE THIS. DO NOT CHANGE.
% 3. Chosen alternative (1/0)        MUST BE THIS. DO NOT CHANGE.
% 4. Price 
% 5. CAP
% 6. SW
% 7. Buffer
% 8. Quality
% 9. Jobs
% 10. Infra
% 11. Wlife
% 12. treatment
% 13. Rate  %Actually just all ones so it can be used for beta as well

% MODEL SPECIFICATION

% RANDOM COEFFICIENTS
% List the variables in XMAT that enter the model with random coefficients and
% give the distribution for the coefficient of each variable.
% IDV contains one row for each random coefficient and two columns.
% The *first* column gives the number of a variable in XMAT that has a random coefficient, 
% and the *second* column specifies the distribution of the coefficient for that variable.
% The distributions can be 
% 1. normal: N(b,w^2) where mean b and standard deviation w are estimated.
% 2. lognormal: coefficient is exp(beta) where beta~N(b,w^2) with b and w estimated
% 3. truncated normal, with the share below zero massed at zero: max(0,beta) where 
%                      beta~N(b,w^2) with b and w estimated.
% 4. S_B: exp(beta)/(1+exp(beta))  where beta~N(b,w^2) with b and w estimated.
% 5. normal with zero mean (for error components): N(0,w^2) where w is estimated.
% 6. triangular: b+w*t where t is triangular between -1 and 1 and mean b and spread w are estimated.
% If no random coefficients, put IDV=[];
% Notes:
% The lognormal, truncated normal, and S_B distributions give positive
% coefficients only. If you want a variable to have only negative coefficients, 
% create the negative of the variable (in the specification of XMAT above).
% The S_B distribution gives coefficients between 0 and 1. If you want
% coefficients to be between 0 and k, then multiply the variable by k (in the specification 
% of XMAT above), since b*k*x for b~(0-1) is the same as b*x for b~(0-k).
% If no random coefficients, put IDV=[];
 
IDV=[  4 2;   ...
       5 1;   ...
       6 1;   ...
       7 1;   ...
       8 1;   ...
       9 1;   ... 
      10 1;   ...
      11 1;   ...
      13 1];

NV=size(IDV,1); %Number of random coefficients. Do not change this line.

% Give a name to each of the explanatory variables in IDV. They can 
% have up to ten characters including spaces. Put the names in single quotes and separate 
% the quotes with semicolons. If IDV=[], then set NAMES=[];
NAMES={'PriceScale';'ASC.CAP';'ASC.SW';'Buffer';'Infra';'Jobs';'Quality';'Wlife';'Rate'}; 

% Starting values
% Specify the starting values for b and w for each random coeffient.
% B contains the first parameter, b, for each random coefficient.  
% It is a column vector with the same length as IDV. For distribution 5 (normal with zero mean),
% put 0 for the starting value for the mean. The code will keep it at 0.
% W contains the second parameter, w, for each random coefficient.
% It is a column vector with the same length as IDV.
% Put semicolons between the elements of B and W (so they will be column vectors).

B=[ -3.75; 0.2; 0.1; 0.93; -0.849; 0.818; 1.372; -0.452; 0.5474];
W=[     5; 4.5;	  4;  1.2;    6.1;  1.25;   5.6;    7.9;  0.001];

% load into global environment for quicker use
if FirstRun==1
    pricetime = csvread('price.time.csv', 1)./10;
    captime = csvread('CAP.time.csv', 1);
    swtime = csvread('SW.time.csv', 1);
    buffertime = csvread('buffer.time.csv', 1);
    jobstime = csvread('jobs.time.csv', 1);
    infratime = csvread('infra.time.csv', 1);
    wlifetime = csvread('wlife.time.csv', 1);
    qualitytime = csvread('quality.time.csv', 1);
    FirstRun=0;
end

% FIXED COEFFICIENTS
% List the variables in XMAT that enter with fixed coefficients.
% Put semicolons between the numbers.
% If no fixed coefficients, put IDF=[];

IDF=[];

NF=size(IDF,1); %Number of fixed coefficients. Do not change this line.

% Give a name to each of the variables in IDF.
%NAMESF={'Beta'};
NAMESF=[];

% Starting values.
% Specify the starting values for the fixed coefficients F.
% F must have the same length as IDF and have one column.
% Put semicolons between the elements (so F will be a column vector.)
%F=[0];
F=[];
% Type of draws to use in simulation
% 1=pseudo-random draws
% 2=standard Halton draws
% 3=shifted and shuffled Halton draws
% 4=modified Latin hypercube sampling, shifted and shuffled 
% 5=create your own draws or load draws from file
DRAWTYPE=2;

% Number of draws from to use per person in simulation.
NDRAWS=1000;

% Set seed for the random number generator.
SEED1 = 14239; 

% If DRAWTYPE=5, then create or load the draws here.
% Create or load a data array, called DR, with dimensions NV x NP x NDRAWS.
% where element DR(i,j,k) is the k-th draw of random coefficient i for person j 
% Put your statements between "if DRAWTYPE==5" and "end".
% 
% If you created DR in a previous matlab session and saved it
% with "save mydraws DR" then put "load('mydraws.mat')" here. The structure
% mydraws will contain the array DR.
% Note: If you want to use the draws that were saved to PUTDR below in a 
% previous run, see the ReadMe.txt file for instructions.

if DRAWTYPE==5
   load('mydraws.mat');
end


% Memory use
% Give the number of draws that you want held in memory at one time.
% This number must be evenly divisible into the number of draws.
% That is NDRAWS./NMEM must be a positive integer.
% To hold all draws in memory at once, set NMEM=NDRAWS.
% A larger value of NMEM requires fewer reads from disc but 
% uses more memory which can slow-down the calculations and increases 
% the chance of running out of memory.
% If DRAWTYPE=5, then you must set NMEM=NDRAWS
NMEM=1000;

% If all the draws are NOT held in memory at one time (that is, if NMEM<NDRAWS), 
% then give the filename (including full path if not in the working directory)
% that you want the draws to be temporarily saved to while the code is running.
% If all draws are held in memory at one time (that is, if NMEM=NDRAWS),
% then this file will not be created. So, if NMEM=NDRAWS, you can set PUTDR=''; 
% or give a file name, whichever you find more convenient, since the name won't be used.
PUTDR='draws';

% WEIGHTS. 
% Do you want to apply weights to the people? 
% Set WANTWGT=1 if you want to apply weights; otherwise set WANTWGT=0;
WANTWGT=0;

% If WANTWGT=1, identify the variable in XMAT that contains the weights.
% This variable can vary over people but must be the same for all rows of
% data for each person. Weights cannot vary over choice situations for
% each person or over alternatives for each choice situation -- only over people.
% The code normalizes the weights such that the sum 
% of weights over people is to equal NP (to assure that standard errors 
% are correctly calculated.) If WANTWGT=0, set IDWGT=[];
IDWGT=[];

% OPTIMIZATION 
% Maximum number of iterations for the optimization routine.
% The code will abort after ITERMAX iterations, even if convergence has
% not been achieved. The default is 400, which is used when MAXITERS=[];
MAXITERS=5000;%[];

% Convergence criterion based on the maximum change in parameters that is considered
% to represent convergence. If all the parameters change by less than PARAMTOL 
% from one iteration to the next, then the code considers convergence to have been
% achieved. The default is 0.000001, which is used when PARAMTOL=[];
PARAMTOL=0.000001; %start with this, then decrease and use previous estimate as start.

% Convergence criterion based on change in the log-likelihood that is
% considered to represent convergence. If the log-likelihood value changes
% less than LLTOL from one iteration to the next, then the optimization routine
% considers convergence to have been achieved. The default is 0.000001,
% which is used when LLTOL=[];
LLTOL=0.000001;%[]; again, change after first estimation

%%%%%%%%% I moved this portion out of the doit script.
%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This script calls check to check the data and input specifications, transforms the data
% into a more easily useable form, calls estimation routine, and prints out results.
% Written by Kenneth Train on July 19, 2006, and latest edits on Sept 24, 2006.


% Check the input data and specifications

disp('Checking inputs.');
ok=check;
if ok == 1;
    disp('Inputs have been checked and look fine.');
else
    return;
end

% Create Global variables to use in estimation

disp('Creating data arrays for run.');
cp=XMAT(:,1); % person
 
nn=zeros(NCS,1);
for n=1:NCS;
    nn(n,1)=sum(XMAT(:,2) == n,1);
end;
NALTMAX=max(nn);  %Maximum number of alternatives in any choice situation

nn=zeros(NP,1);
for n=1:NP;
   k=(XMAT(:,1)==n);%rows for person n
   k=XMAT(k,2); %choice sets for person n
   nn(n,1)=1+k(end,1)-k(1,1);
end;
NCSMAX=max(nn);  %Maximum number of choice situations faced by any person

NTAKES=NDRAWS./NMEM; %This tells code how many passes through the draws are needed 
                           % given NMEM people in each pass.
if WANTWGT == 1
    WGT=zeros(1,NP);
    for r=1:NP
        WGT(1,r)=mean(XMAT(cp == r,IDWGT),1);
    end
    WGT=WGT.*(NP./sum(WGT,2));
else
    WGT=[];
end

% Data arrays 
% All variables are differenced from the chosen alternative
% Only nonchosen alternatives are included, since V for chosen alt =0
% This reduces number of calculations for logit prob and eliminates need to
% retain dependent variable.



XX=zeros(NALTMAX-1,NCSMAX,NV,NP); % Explanatory variables with random coefficients 
%                                  for all choice situations, for each person 
S=zeros(NALTMAX-1,NCSMAX,NP); % Identification of the alternatives in each choice situation, for each person
% S is only used if draws are broken into multiple "takes".

%Create draws

if DRAWTYPE ~= 5
   disp('Creating draws.');
   DR=makedraws;   %NMEMxNPxNV
   if NTAKES == 1
      DR=permute(DR,[3,2,1]);   %To make NVxNPxNDRAWS
   else 
      MDR=memmapfile(PUTDR,'Format',{'double',[NDRAWS,NP,NV],'drs'});
   end
end

if NV>0 & NF>0
   param=[F;B(IDV(:,2)~=5,1);W];
elseif NV>0 & NF==0
   param=[B(IDV(:,2)~=5,1);W];
elseif NV==0 & NF>0;
   param=F;
else
   disp('Model has no explanatory variables.');
   disp('IDV and IDF are both empty.');
   disp('Program terminated.');
   return
end


%%% subtract chosen X from non-choosen X for each choice set

X = zeros(NV-1, NROWS, T+1);
X(1,:,:) = pricetime;
X(2,:,:) = captime;
X(3,:,:) = swtime;
X(4,:,:) = buffertime;
X(5,:,:) = infratime;
X(6,:,:) = jobstime;
X(7,:,:) = qualitytime;
X(8,:,:) = wlifetime;


% XX will now have data matrix for non-chosen minus chosen alternatives
XX=zeros(NV-1, NALTMAX-1,NCSMAX,NP,T+1); % Explanatory variables with random coefficients 
%                                  for all choice situations, for each person 
S=zeros(NALTMAX-1,NCSMAX,NP); % Identification of the alternatives in each choice situation, for each person
% S only needed if you break up NDRAWS to so max value

for n=1:NP  %loop over people
 cs=XMAT(cp == n,2);
 yy=XMAT(cp == n,3);
 if NV > 0
    xx=X(:,cp == n, :); % 8 by 15 by 66
 end
 t1=cs(1,1);
 t2=cs(end,1);
 for t=t1:t2 %loop over choice situations
     k=sum(cs==t)-1; %One less than number of alts = number of nonchosen alts
     S(1:k,1+t-t1,n)=ones(k,1);
     XX(:,1:k,1+t-t1,n,:)=bsxfun(@minus, xx(:,cs==t & yy == 0,:), xx(:,cs==t & yy == 1,:));
  end
end


if(estimate==1)
    doit;
else
    [ll]=logliknog(param);
    disp(ll)
end

% These last lines delete the file of draws that is created when NMEM<NDRAWS
% since it is no longer needed. If you want to save it, then put % in front
% of these lines.
if NMEM<NDRAWS
    clear global MDR
    delete(PUTDR)
end

diary off;